\documentclass[10pt,oneside]{book}

\input{macros_orig.tex}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{document}

\pagestyle{empty}
\thispagestyle{empty}

\begin{codebox}
  \Procname{$\proc{Q-Learning}(\mathcal S, \mathcal A, s_0, \gamma,
                               \alpha)$}
  \li \For $s \in \mathcal{S}, a \in \mathcal{A}:$
  \li   \Do
          $Q[s, a] = 0$
        \End
  \li $s \gets s_0$ \Comment Or draw an $s$ randomly from $\mathcal S$
  \li \While True:
  \li   \Do
          $a \gets \text{select}\_\text{action}(s, Q)$
  \li     $r,s' \gets \text{execute}(a)$
  \li     $Q[s, a] = (1-\alpha)Q[s, a]
          + \alpha(r + \gamma \max_{a'}Q[s',a'])$
  \li     $s \gets s'$
\end{codebox}

\end{document}
